msg_tool\scripts\bgi\archive/
dsc.rs1use crate::ext::io::*;
3use crate::ext::vec::*;
4use crate::scripts::base::*;
5use crate::types::*;
6use crate::utils::bit_stream::*;
7use crate::utils::num_range::*;
8use anyhow::Result;
9use rand::Rng;
10use std::collections::BinaryHeap;
11use std::io::{Seek, Write};
12
13#[derive(Debug)]
14struct HuffmanCode {
15 code: u16,
16 depth: u8,
17}
18
19impl std::cmp::PartialEq for HuffmanCode {
20 fn eq(&self, other: &Self) -> bool {
21 self.code == other.code && self.depth == other.depth
22 }
23}
24
25impl std::cmp::Eq for HuffmanCode {}
26
27impl std::cmp::PartialOrd for HuffmanCode {
28 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
29 let cmp = self.depth.cmp(&other.depth);
30 if cmp == std::cmp::Ordering::Equal {
31 Some(self.code.cmp(&other.code))
32 } else {
33 Some(cmp)
34 }
35 }
36}
37
38impl std::cmp::Ord for HuffmanCode {
39 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
40 let cmp = self.depth.cmp(&other.depth);
41 if cmp == std::cmp::Ordering::Equal {
42 self.code.cmp(&other.code)
43 } else {
44 cmp
45 }
46 }
47}
48
49#[derive(Clone, Debug)]
50struct HuffmanNode {
51 is_parent: bool,
52 code: Option<u16>,
53 left_index: usize,
54 right_index: usize,
55}
56
57pub struct DscDecoder<'a> {
59 stream: MsbBitStream<MemReaderRef<'a>>,
60 key: u32,
61 magic: u32,
62 output_size: u32,
63 dec_count: u32,
64}
65
66impl<'a> DscDecoder<'a> {
67 pub fn new(data: &'a [u8]) -> Result<Self> {
69 let mut reader = MemReaderRef::new(data);
70 let magic = (reader.read_u16()? as u32) << 16;
71 reader.pos = 0x10;
72 let key = reader.read_u32()?;
73 let output_size = reader.read_u32()?;
74 let dec_count = reader.read_u32()?;
75 let stream = MsbBitStream::new(reader);
76 Ok(DscDecoder {
77 stream,
78 key,
79 magic,
80 output_size,
81 dec_count,
82 })
83 }
84
85 pub fn unpack(mut self) -> Result<Vec<u8>> {
87 self.stream.m_input.pos = 0x20;
88 let mut codes = Vec::new();
89 for i in 0..512 {
90 let src = self.stream.m_input.read_u8()?;
91 let depth = src.overflowing_sub(self.update_key()).0;
92 if depth > 0 {
93 codes.push(HuffmanCode { code: i, depth })
94 }
95 }
96 codes.sort();
97 let root = Self::create_huffman_tree(codes);
98 self.huffman_decompress(root)
99 }
100
101 fn create_huffman_tree(codes: Vec<HuffmanCode>) -> Vec<HuffmanNode> {
102 let mut trees = Vec::with_capacity(1024);
103 trees.resize(
104 1024,
105 HuffmanNode {
106 is_parent: false,
107 code: None,
108 left_index: 0,
109 right_index: 0,
110 },
111 );
112 let mut left_index = vec![0usize; 512];
113 let mut right_index = vec![0usize; 512];
114 let mut next_node_index = 1usize;
115 let mut depth_nodes = 1usize;
116 let mut depth = 0u8;
117 let mut left_child = true;
118 let mut n = 0;
119 while n < codes.len() {
120 let huffman_node_index = left_child;
121 left_child = !left_child;
122 let mut depth_existed_nodes = 0;
123 while n < codes.len() && codes[n].depth == depth {
124 let index = if huffman_node_index {
125 left_index[depth_existed_nodes]
126 } else {
127 right_index[depth_existed_nodes]
128 };
129 trees[index].code = Some(codes[n].code);
130 n += 1;
131 depth_existed_nodes += 1;
132 }
133 let depth_nodes_to_create = depth_nodes - depth_existed_nodes;
134 for i in 0..depth_nodes_to_create {
135 let index = if huffman_node_index {
136 left_index[depth_existed_nodes + i]
137 } else {
138 right_index[depth_existed_nodes + i]
139 };
140 let node = &mut trees[index];
141 node.is_parent = true;
142 if left_child {
143 left_index[i * 2] = next_node_index;
144 node.left_index = next_node_index;
145 next_node_index += 1;
146 left_index[i * 2 + 1] = next_node_index;
147 node.right_index = next_node_index;
148 next_node_index += 1;
149 } else {
150 right_index[i * 2] = next_node_index;
151 node.left_index = next_node_index;
152 next_node_index += 1;
153 right_index[i * 2 + 1] = next_node_index;
154 node.right_index = next_node_index;
155 next_node_index += 1;
156 }
157 }
158 depth += 1;
159 depth_nodes = depth_nodes_to_create * 2;
160 }
161 trees
162 }
163
164 fn huffman_decompress(&mut self, nodes: Vec<HuffmanNode>) -> Result<Vec<u8>> {
165 let output_size = self.output_size as usize;
166 let mut output = Vec::with_capacity(output_size);
167 let mut dst = 0;
168 output.resize(output_size, 0);
169 for _ in 0..self.dec_count {
170 let mut current_node = &nodes[0];
171 loop {
172 let bit = self.stream.get_next_bit()?;
173 if !bit {
174 current_node = &nodes[current_node.left_index]
175 } else {
176 current_node = &nodes[current_node.right_index]
177 }
178 if !current_node.is_parent {
179 break;
180 }
181 }
182 let code = *current_node.code.as_ref().unwrap();
183 if code >= 256 {
184 let mut offset = self.stream.get_bits(12)?;
185 let count = ((code & 0xFF) + 2) as usize;
186 offset += 2;
187 output.copy_overlapped(dst - offset as usize, dst, count);
188 dst += count;
189 } else {
190 output[dst] = code as u8;
191 dst += 1;
192 }
193 }
194 if dst != output_size {
195 eprintln!(
196 "Warning: Output size mismatch, expected {}, got {}",
197 self.output_size, dst
198 );
199 crate::COUNTER.inc_warning();
200 }
201 Ok(output)
202 }
203
204 fn update_key(&mut self) -> u8 {
205 let v0 = 20021 * (self.key & 0xffff);
206 let mut v1 = self.magic | (self.key >> 16);
207 v1 = v1
208 .overflowing_mul(20021)
209 .0
210 .overflowing_add(self.key.overflowing_mul(346).0)
211 .0;
212 v1 = (v1 + (v0 >> 16)) & 0xffff;
213 self.key = (v1 << 16) + (v0 & 0xffff) + 1;
214 v1 as u8
215 }
216}
217
218#[derive(Debug, Clone, Copy)]
219enum LzssOp {
220 Literal(u8),
221 Match { len: u16, offset: u16 },
222}
223
224#[derive(Debug)]
225struct FreqNode {
226 freq: u32,
227 symbol: Option<u16>,
228 left: Option<Box<FreqNode>>,
229 right: Option<Box<FreqNode>>,
230}
231impl PartialEq for FreqNode {
232 fn eq(&self, other: &Self) -> bool {
233 self.freq == other.freq
234 }
235}
236impl Eq for FreqNode {}
237impl PartialOrd for FreqNode {
238 fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
239 Some(self.cmp(other))
240 }
241}
242impl Ord for FreqNode {
243 fn cmp(&self, other: &Self) -> std::cmp::Ordering {
244 other.freq.cmp(&self.freq)
245 }
246}
247
248fn calculate_huffman_depths(freqs: &[u32]) -> Vec<u8> {
249 const MAX_DEPTH: u8 = 9;
250
251 let mut symbols_with_freq: Vec<(u16, u32)> = freqs
253 .iter()
254 .enumerate()
255 .filter_map(|(symbol, &freq)| {
256 if freq > 0 {
257 Some((symbol as u16, freq))
258 } else {
259 None
260 }
261 })
262 .collect();
263
264 let mut depths = vec![0u8; 512];
265
266 if symbols_with_freq.is_empty() {
267 return depths;
268 }
269
270 if symbols_with_freq.len() == 1 {
271 depths[symbols_with_freq[0].0 as usize] = 1;
272 return depths;
273 }
274
275 loop {
277 let current_depths = build_huffman_tree(&symbols_with_freq);
278 let max_depth = current_depths.iter().max().copied().unwrap_or(0);
279
280 if max_depth <= MAX_DEPTH {
281 for &(symbol, _) in &symbols_with_freq {
283 let symbol_index = symbols_with_freq
284 .iter()
285 .position(|(s, _)| *s == symbol)
286 .unwrap();
287 depths[symbol as usize] = current_depths[symbol_index];
288 }
289 break;
290 }
291
292 adjust_frequencies_for_depth_limit(&mut symbols_with_freq);
294 }
295
296 depths
297}
298
299fn build_huffman_tree(symbols_with_freq: &[(u16, u32)]) -> Vec<u8> {
300 let mut heap = BinaryHeap::new();
301
302 for &(symbol, freq) in symbols_with_freq {
304 heap.push(FreqNode {
305 freq,
306 symbol: Some(symbol),
307 left: None,
308 right: None,
309 });
310 }
311
312 while heap.len() > 1 {
314 let node1 = heap.pop().unwrap();
315 let node2 = heap.pop().unwrap();
316 let new_node = FreqNode {
317 freq: node1.freq + node2.freq,
318 symbol: None,
319 left: Some(Box::new(node1)),
320 right: Some(Box::new(node2)),
321 };
322 heap.push(new_node);
323 }
324
325 let mut depths = vec![0u8; symbols_with_freq.len()];
327 if let Some(root) = heap.pop() {
328 calculate_depths(&root, 0, symbols_with_freq, &mut depths);
329 }
330
331 depths
332}
333
334fn calculate_depths(
335 node: &FreqNode,
336 depth: u8,
337 symbols_with_freq: &[(u16, u32)],
338 depths: &mut [u8],
339) {
340 if let Some(symbol) = node.symbol {
341 let symbol_index = symbols_with_freq
342 .iter()
343 .position(|(s, _)| *s == symbol)
344 .unwrap();
345 depths[symbol_index] = if depth == 0 { 1 } else { depth };
346 } else {
347 if let Some(ref left) = node.left {
348 calculate_depths(left, depth + 1, symbols_with_freq, depths);
349 }
350 if let Some(ref right) = node.right {
351 calculate_depths(right, depth + 1, symbols_with_freq, depths);
352 }
353 }
354}
355
356fn adjust_frequencies_for_depth_limit(symbols_with_freq: &mut [(u16, u32)]) {
357 symbols_with_freq.sort_by(|a, b| a.1.cmp(&b.1));
359
360 let min_freq = symbols_with_freq[0].1;
363 let adjustment = (min_freq as f64 * 0.1).max(1.0) as u32;
364
365 let num_to_adjust = (symbols_with_freq.len() / 4).max(1);
367 for i in 0..num_to_adjust.min(symbols_with_freq.len()) {
368 symbols_with_freq[i].1 += adjustment;
369 }
370}
371
372fn generate_canonical_codes(depths: &[u8]) -> Vec<Option<(u16, u8)>> {
373 let mut codes_with_depths = vec![];
374 for (symbol, &depth) in depths.iter().enumerate() {
375 if depth > 0 {
376 codes_with_depths.push((symbol as u16, depth));
377 }
378 }
379 codes_with_depths.sort_by(|a, b| {
380 let depth_cmp = a.1.cmp(&b.1);
381 if depth_cmp == std::cmp::Ordering::Equal {
382 a.0.cmp(&b.0)
383 } else {
384 depth_cmp
385 }
386 });
387
388 let mut huffman_codes = vec![None; 512];
389 let mut current_code = 0u16;
390 let mut last_depth = 0u8;
391
392 for &(symbol, depth) in &codes_with_depths {
393 if last_depth != 0 {
394 current_code <<= depth - last_depth;
395 }
396 huffman_codes[symbol as usize] = Some((current_code, depth));
397 current_code += 1;
398 last_depth = depth;
399 }
400
401 huffman_codes
402}
403
404pub struct DscEncoder<'a, T: Write + Seek> {
406 stream: MsbBitWriter<'a, T>,
407 magic: u32,
408 key: u32,
409 dec_count: u32,
410 min_len: usize,
411}
412
413impl<'a, T: Write + Seek> DscEncoder<'a, T> {
414 pub fn new(writer: &'a mut T, min_len: usize) -> Self {
416 let stream = MsbBitWriter::new(writer);
417 DscEncoder {
418 stream,
419 magic: 0x5344 << 16, key: rand::rng().random(),
421 dec_count: 0,
422 min_len,
423 }
424 }
425
426 pub fn pack(mut self, data: &[u8]) -> Result<()> {
428 let mut ops = vec![];
430 let mut pos = 0;
431
432 const MAX_LEN: usize = 257;
433 const WINDOW_SIZE: usize = 4097;
434
435 let mut head: Vec<i32> = vec![-1; 1 << 16];
436 let mut prev: Vec<i32> = vec![-1; data.len()];
437
438 while pos < data.len() {
439 let max_len = (data.len() - pos).min(MAX_LEN);
440 let mut best_len = 0;
441 let mut best_offset = 0;
442
443 if max_len >= self.min_len {
444 let limit = pos.saturating_sub(WINDOW_SIZE);
445 let key = (data[pos] as u16) << 8 | data[pos + 1] as u16;
446 let mut match_pos_i32 = head[key as usize];
447
448 while match_pos_i32 != -1 {
449 let match_pos = match_pos_i32 as usize;
450 if match_pos < limit {
451 break;
452 }
453
454 if data.get(match_pos + best_len) == data.get(pos + best_len) {
455 let mut current_len = 0;
456 for i in 0..max_len {
457 if data.get(pos + i) != data.get(match_pos + i) {
458 break;
459 }
460 current_len += 1;
461 }
462
463 if current_len > best_len {
464 best_len = current_len;
465 best_offset = pos - match_pos;
466 if best_len >= max_len {
467 break;
468 }
469 }
470 }
471 match_pos_i32 = prev[match_pos];
472 }
473 }
474
475 if best_len >= self.min_len && best_offset >= 2 {
476 ops.push(LzssOp::Match {
477 len: best_len as u16,
478 offset: best_offset as u16,
479 });
480 for i in 0..best_len {
481 if pos + i + 1 < data.len() {
482 let key = (data[pos + i] as u16) << 8 | data[pos + i + 1] as u16;
483 let current_pos = pos + i;
484 prev[current_pos] = head[key as usize];
485 head[key as usize] = current_pos as i32;
486 }
487 }
488 pos += best_len;
489 } else {
490 ops.push(LzssOp::Literal(data[pos]));
491 if pos + 1 < data.len() {
492 let key = (data[pos] as u16) << 8 | data[pos + 1] as u16;
493 prev[pos] = head[key as usize];
494 head[key as usize] = pos as i32;
495 }
496 pos += 1;
497 }
498 }
499
500 let symbols: Vec<u16> = ops
501 .iter()
502 .map(|op| match op {
503 LzssOp::Literal(byte) => *byte as u16,
504 LzssOp::Match { len, .. } => 256 + (len - 2),
505 })
506 .collect();
507 self.dec_count = symbols.len() as u32;
508
509 let mut freqs = vec![0u32; 512];
510 for &s in &symbols {
511 freqs[s as usize] += 1;
512 }
513
514 let depths = calculate_huffman_depths(&freqs);
515 let huffman_codes = generate_canonical_codes(&depths);
516
517 self.stream.writer.write_all(b"DSC FORMAT 1.00\0")?;
518 self.stream.writer.seek(std::io::SeekFrom::Start(0x10))?;
519 self.stream.writer.write_u32(self.key)?;
520 self.stream.writer.write_u32(data.len() as u32)?;
521 self.stream.writer.write_u32(self.dec_count)?;
522 self.stream.writer.seek(std::io::SeekFrom::Start(0x20))?;
523
524 for depth in depths.iter() {
525 let key = self.update_key();
526 self.stream.writer.write_u8(depth.overflowing_add(key).0)?;
527 }
528
529 for op in &ops {
530 match op {
531 LzssOp::Literal(byte) => {
532 let symbol = *byte as u16;
533 let (code, len) = huffman_codes[symbol as usize].unwrap();
534 self.stream.put_bits(code as u32, len)?;
535 }
536 LzssOp::Match { len, offset } => {
537 let symbol = 256 + (len - 2);
538 let (code, huff_len) = huffman_codes[symbol as usize].unwrap();
539 self.stream.put_bits(code as u32, huff_len)?;
540 self.stream.put_bits((*offset - 2) as u32, 12)?;
541 }
542 }
543 }
544 self.stream.flush()?;
545 Ok(())
546 }
547
548 fn update_key(&mut self) -> u8 {
549 let v0 = 20021 * (self.key & 0xffff);
550 let mut v1 = self.magic | (self.key >> 16);
551 v1 = v1
552 .overflowing_mul(20021)
553 .0
554 .overflowing_add(self.key.overflowing_mul(346).0)
555 .0;
556 v1 = (v1 + (v0 >> 16)) & 0xffff;
557 self.key = (v1 << 16) + (v0 & 0xffff) + 1;
558 v1 as u8
559 }
560}
561
562#[derive(Debug)]
563pub struct DscBuilder {}
565
566impl DscBuilder {
567 pub fn new() -> Self {
569 DscBuilder {}
570 }
571}
572
573impl ScriptBuilder for DscBuilder {
574 fn default_encoding(&self) -> Encoding {
575 Encoding::Cp932
576 }
577
578 fn default_archive_encoding(&self) -> Option<Encoding> {
579 Some(Encoding::Cp932)
580 }
581
582 fn build_script(
583 &self,
584 buf: Vec<u8>,
585 _filename: &str,
586 _encoding: Encoding,
587 _archive_encoding: Encoding,
588 config: &ExtraConfig,
589 _archive: Option<&Box<dyn Script>>,
590 ) -> Result<Box<dyn Script>> {
591 Ok(Box::new(Dsc::new(buf, config)?))
592 }
593
594 fn extensions(&self) -> &'static [&'static str] {
595 &[]
596 }
597
598 fn script_type(&self) -> &'static ScriptType {
599 &ScriptType::BGIDsc
600 }
601
602 fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
603 if buf_len >= 16 && buf.starts_with(b"DSC FORMAT 1.00\0") {
604 return Some(255);
605 }
606 None
607 }
608
609 fn can_create_file(&self) -> bool {
610 true
611 }
612
613 fn create_file<'a>(
614 &'a self,
615 filename: &'a str,
616 mut writer: Box<dyn WriteSeek + 'a>,
617 _encoding: Encoding,
618 _file_encoding: Encoding,
619 config: &ExtraConfig,
620 ) -> Result<()> {
621 let encoder = DscEncoder::new(&mut writer, config.bgi_compress_min_len);
622 let data = crate::utils::files::read_file(filename)?;
623 encoder.pack(&data)?;
624 Ok(())
625 }
626}
627
628#[derive(Debug)]
629pub struct Dsc {
631 data: Vec<u8>,
632 min_len: usize,
633}
634
635impl Dsc {
636 pub fn new(buf: Vec<u8>, config: &ExtraConfig) -> Result<Self> {
641 if buf.len() < 16 || !buf.starts_with(b"DSC FORMAT 1.00\0") {
642 return Err(anyhow::anyhow!("Invalid DSC format"));
643 }
644 let decoder = DscDecoder::new(&buf)?;
645 let data = decoder.unpack()?;
646 Ok(Dsc {
647 data,
648 min_len: config.bgi_compress_min_len,
649 })
650 }
651}
652
653impl Script for Dsc {
654 fn default_output_script_type(&self) -> OutputScriptType {
655 OutputScriptType::Custom
656 }
657
658 fn is_output_supported(&self, output: OutputScriptType) -> bool {
659 matches!(output, OutputScriptType::Custom)
660 }
661
662 fn default_format_type(&self) -> FormatOptions {
663 FormatOptions::None
664 }
665
666 fn custom_output_extension(&self) -> &'static str {
667 ""
668 }
669
670 fn custom_export(&self, filename: &std::path::Path, _encoding: Encoding) -> Result<()> {
671 let mut f = std::fs::File::create(filename)?;
672 f.write_all(&self.data)?;
673 Ok(())
674 }
675
676 fn custom_import<'a>(
677 &'a self,
678 custom_filename: &'a str,
679 mut file: Box<dyn WriteSeek + 'a>,
680 _encoding: Encoding,
681 _output_encoding: Encoding,
682 ) -> Result<()> {
683 let encoder = DscEncoder::new(&mut file, self.min_len);
684 let data = crate::utils::files::read_file(custom_filename)?;
685 encoder.pack(&data)?;
686 Ok(())
687 }
688}
689
690pub fn parse_min_length(len: &str) -> Result<usize, String> {
692 number_range(len, 2, 256)
693}